from urllib import urlencode

from scrapy import log
from scrapy.conf import settings
from scrapy.core.exceptions import IgnoreRequest

from httplib2 import Http


h = Http()
SERVER = settings['DJANGO_SERVER']


def _is_page_crawled(url):
    api_url = '%s/api/is_page_crawled' % SERVER
    body = urlencode({'url': url})
    response, content =  h.request(api_url, method='POST', body=body)
    if response.status != 200:
        raise Exception("Http error while checking if page is up to date: %s" % body)
    if content == 'yes':
        return True
    elif content == 'old' or content == 'no':
        return False
    else:
        raise Exception("Unknown page state.")


class NoDupsDownloaderMiddleware(object):
    def process_request(self, request, spider):
        if _is_page_crawled(request.url):
            log.msg("Ignoring request because it's recently been crawled: %s" % request.url, log.DEBUG)
            raise IgnoreRequest()
#        log.msg("Marking page crawled: %s" % response.url, log.WARNING)
        self._crawled_page(request.url)
    
    def _crawled_page(self, url):
        api_url = '%s/api/crawled_page' % SERVER
        body = urlencode({'url': url})
        response, content =  h.request(api_url, method='POST', body=body)
        if response.status != 200:
            raise Exception("Http error while marking page as crawled: %s" % body)
